Contents

## Loading required package: limma
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
## 
##     lowess
## Loading required package: Biobase
## Loading required package: BiocGenerics
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## The following object is masked from 'package:limma':
## 
##     plotMA
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     Filter, Find, Map, Position, Reduce, anyDuplicated, append,
##     as.data.frame, basename, cbind, colnames, dirname, do.call,
##     duplicated, eval, evalq, get, grep, grepl, intersect,
##     is.unsorted, lapply, mapply, match, mget, order, paste, pmax,
##     pmax.int, pmin, pmin.int, rank, rbind, rownames, sapply,
##     setdiff, sort, table, tapply, union, unique, unsplit, which,
##     which.max, which.min
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## Setting options('download.file.method.GEOquery'='auto')
## Setting options('GEOquery.inmemory.gpl'=FALSE)
## -- Attaching packages ------------------------------ tidyverse 1.2.1 --
## v ggplot2 3.2.1     v purrr   0.3.2
## v tibble  2.1.3     v dplyr   0.8.2
## v tidyr   0.8.3     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.4.0
## -- Conflicts --------------------------------- tidyverse_conflicts() --
## x ggplot2::Position() masks BiocGenerics::Position(), base::Position()
## x dplyr::combine()    masks Biobase::combine(), BiocGenerics::combine()
## x dplyr::filter()     masks stats::filter()
## x dplyr::lag()        masks stats::lag()

1 Introduction

Paired-end sequencing was performed on primary cultures from parathyroid tumors of 4 patients at 2 time points over 3 conditions (control, treatment with diarylpropionitrile (DPN) and treatment with 4-hydroxytamoxifen (OHT)). DPN is a selective estrogen receptor agonist and OHT is a selective estrogen receptor modulator. One sample (patient 4, 24 hours, control) was omitted by the paper authors due to low quality. Data, the count table and information on the experiment is available at http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE37211.

2 Count data and meta data

file<-"https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE37211&format=file&file=GSE37211%5Fcount%5Ftable%2Etxt%2Egz"
countTable<-file %>% url %>% gzcon %>% readLines %>% textConnection %>% read.table(.,sep="\t",header=TRUE)
head(countTable)
##                  X1  X2  X3  X4  X5  X6  X7  X8   X9 X10  X11 X12 X13 X14
## ENSG00000000003 673 916 391 830 474 734 370 325 1140 419 1124 393 293 356
## ENSG00000000005   4   1   2   3   2   1   0   1    0   0    0   0   0   0
## ENSG00000000419 239 211 132 209 142 170 216 154  465 196  514 203 203 216
## ENSG00000000457 144 181  88 145  73 107 214 165  496 174  522 204 117 114
## ENSG00000000460 364 188 196 200 213 162 535 184 1461 256 1531 224 181  96
## ENSG00000000938   3   7   2   5   1   3  12  23   41  13   33  12   6   3
##                 X15 X16 X17 X18 X20 X21 X22 X23 X24
## ENSG00000000003 537 621 787 321 396 372 666 334 674
## ENSG00000000005   0   0   1   0   0   0   0   0   0
## ENSG00000000419 309 355 466 222 211 201 308 176 402
## ENSG00000000457 180 242 315 126 165 141 251 134 297
## ENSG00000000460 358 271 699 159  86 314 219 297 323
## ENSG00000000938   9  17  14   6   6   7  17   8  16

2.1 Get info on samples

Get all info from GEO. get sample info via getGEO (info from samples)

gse<-getGEO("GSE37211")
## Found 1 file(s)
## GSE37211_series_matrix.txt.gz
## Parsed with column specification:
## cols(
##   .default = col_character()
## )
## See spec(...) for full column specifications.
## File stored at:
## /var/folders/p1/3js9hvbs473g1klcmqm0d8wm0000gn/T//Rtmp5Zb3Mm/GPL11154.soft
pdata<-pData(gse[[1]])
target<-pdata[,grep(colnames(pdata),pattern=":ch1")]
target
##           agent:ch1 patient:ch1 time:ch1        tissue:ch1
## GSM913873   Control           1      24h Parathyroid tumor
## GSM913874   Control           1      48h Parathyroid tumor
## GSM913875       DPN           1      24h Parathyroid tumor
## GSM913876       DPN           1      48h Parathyroid tumor
## GSM913877       OHT           1      24h Parathyroid tumor
## GSM913878       OHT           1      48h Parathyroid tumor
## GSM913879   Control           2      24h Parathyroid tumor
## GSM913880   Control           2      48h Parathyroid tumor
## GSM913881       DPN           2      24h Parathyroid tumor
## GSM913882       DPN           2      48h Parathyroid tumor
## GSM913883       OHT           2      24h Parathyroid tumor
## GSM913884       OHT           2      48h Parathyroid tumor
## GSM913885   Control           3      24h Parathyroid tumor
## GSM913886   Control           3      48h Parathyroid tumor
## GSM913887       DPN           3      24h Parathyroid tumor
## GSM913888       DPN           3      48h Parathyroid tumor
## GSM913889       OHT           3      24h Parathyroid tumor
## GSM913890       OHT           3      48h Parathyroid tumor
## GSM913891   Control           4      48h Parathyroid tumor
## GSM913892       DPN           4      24h Parathyroid tumor
## GSM913893       DPN           4      48h Parathyroid tumor
## GSM913894       OHT           4      24h Parathyroid tumor
## GSM913895       OHT           4      48h Parathyroid tumor
colnames(target)<-sub(":ch1","",colnames(target))
target<-apply(target,2,as.factor) %>% as.data.frame

3 Data analysis

3.1 Count object

dge <- DGEList(counts=countTable)
dge$sample
##     group lib.size norm.factors
## X1      1  6940755            1
## X2      1  8180762            1
## X3      1  4176780            1
## X4      1  7412702            1
## X5      1  4537507            1
## X6      1  6115870            1
## X7      1  6473767            1
## X8      1  5264925            1
## X9      1 16470386            1
## X10     1  6302823            1
## X11     1 16146909            1
## X12     1  6230264            1
## X13     1  5881155            1
## X14     1  6452338            1
## X15     1  9613620            1
## X16     1 12316913            1
## X17     1 16944429            1
## X18     1  6221756            1
## X20     1  6176208            1
## X21     1  5756955            1
## X22     1 10392039            1
## X23     1  5029131            1
## X24     1 11935997            1

3.2 Design

There can be an effect of agent, time interaction and agent x time interaction. We also expect blocking for patient. We can assess all effects of interest within patient.

design <- model.matrix(~agent*time+patient,target)
rownames(design) = colnames(dge)
design
##     (Intercept) agentDPN agentOHT time48h patient2 patient3 patient4
## X1            1        0        0       0        0        0        0
## X2            1        0        0       1        0        0        0
## X3            1        1        0       0        0        0        0
## X4            1        1        0       1        0        0        0
## X5            1        0        1       0        0        0        0
## X6            1        0        1       1        0        0        0
## X7            1        0        0       0        1        0        0
## X8            1        0        0       1        1        0        0
## X9            1        1        0       0        1        0        0
## X10           1        1        0       1        1        0        0
## X11           1        0        1       0        1        0        0
## X12           1        0        1       1        1        0        0
## X13           1        0        0       0        0        1        0
## X14           1        0        0       1        0        1        0
## X15           1        1        0       0        0        1        0
## X16           1        1        0       1        0        1        0
## X17           1        0        1       0        0        1        0
## X18           1        0        1       1        0        1        0
## X20           1        0        0       1        0        0        1
## X21           1        1        0       0        0        0        1
## X22           1        1        0       1        0        0        1
## X23           1        0        1       0        0        0        1
## X24           1        0        1       1        0        0        1
##     agentDPN:time48h agentOHT:time48h
## X1                 0                0
## X2                 0                0
## X3                 0                0
## X4                 1                0
## X5                 0                0
## X6                 0                1
## X7                 0                0
## X8                 0                0
## X9                 0                0
## X10                1                0
## X11                0                0
## X12                0                1
## X13                0                0
## X14                0                0
## X15                0                0
## X16                1                0
## X17                0                0
## X18                0                1
## X20                0                0
## X21                0                0
## X22                1                0
## X23                0                0
## X24                0                1
## attr(,"assign")
## [1] 0 1 1 2 3 3 3 4 4
## attr(,"contrasts")
## attr(,"contrasts")$agent
## [1] "contr.treatment"
## 
## attr(,"contrasts")$time
## [1] "contr.treatment"
## 
## attr(,"contrasts")$patient
## [1] "contr.treatment"

3.3 Filtering

keep <- filterByExpr(dge, design)
 dge <- dge[keep, , keep.lib.sizes=FALSE]

3.4 Normalisation

dge <- calcNormFactors(dge)
dge$samples
##     group lib.size norm.factors
## X1      1  6928344    0.9991826
## X2      1  8166370    0.9871519
## X3      1  4169707    0.9777048
## X4      1  7399280    1.0056375
## X5      1  4529364    0.9772584
## X6      1  6105569    0.9891050
## X7      1  6462881    0.9677161
## X8      1  5256022    0.9647628
## X9      1 16438721    1.0043510
## X10     1  6292354    0.9620275
## X11     1 16116791    0.9946148
## X12     1  6219897    0.9599718
## X13     1  5868039    1.0277969
## X14     1  6437868    1.0050856
## X15     1  9592029    1.0358842
## X16     1 12288733    1.0146357
## X17     1 16904257    1.0421481
## X18     1  6208329    0.9919888
## X20     1  6163365    1.0108964
## X21     1  5745207    1.0209375
## X22     1 10369491    1.0284136
## X23     1  5018532    1.0154103
## X24     1 11909919    1.0238401

3.5 Data exploration

An MDS plot shows the leading fold changes (differential expression) between the 23 samples.

plotMDS(dge,labels=paste(target$agent,target$time,target$patient,sep="-"),col=as.double(target$agent))

There is a very strong patient effect!

3.6 Parameter estimation

dge <- estimateDisp(dge, design, robust=TRUE)  
plotBCV(dge)

fit <- glmFit(dge,design)

3.7 Contrasts

L <- matrix(0,ncol=ncol(design),nrow=9)
colnames(L)<-colnames(design)
L[1,"agentDPN"]<-1
L[2,"agentOHT"]<-1
L[3,]<-L[2,]-L[1,]
L[4,grep(colnames(L),pattern="DPN")]<-1
L[5,grep(colnames(L),pattern="OHT")]<-1
L[6,]<-L[5,]-L[4,]
L[7,"agentDPN:time48h"]<-1
L[8,"agentOHT:time48h"]<-1
L[9,]<-L[8,]-L[7,]
rownames(L)<-c(paste("early",c("DPN-C","OHT-C","OHT-DPN")),paste("late",c("DPN-C","OHT-C","OHT-DPN")),paste("inter",c("DPN-C","OHT-C","OHT-DPN")))
L
##               (Intercept) agentDPN agentOHT time48h patient2 patient3
## early DPN-C             0        1        0       0        0        0
## early OHT-C             0        0        1       0        0        0
## early OHT-DPN           0       -1        1       0        0        0
## late DPN-C              0        1        0       0        0        0
## late OHT-C              0        0        1       0        0        0
## late OHT-DPN            0       -1        1       0        0        0
## inter DPN-C             0        0        0       0        0        0
## inter OHT-C             0        0        0       0        0        0
## inter OHT-DPN           0        0        0       0        0        0
##               patient4 agentDPN:time48h agentOHT:time48h
## early DPN-C          0                0                0
## early OHT-C          0                0                0
## early OHT-DPN        0                0                0
## late DPN-C           0                1                0
## late OHT-C           0                0                1
## late OHT-DPN         0               -1                1
## inter DPN-C          0                1                0
## inter OHT-C          0                0                1
## inter OHT-DPN        0               -1                1

3.8 Tests

3.8.1 Omnibus test

omnibus<-glmLRT(fit,contrast=t(L))

3.8.2 post-hoc

posthoc<-apply(L,1,function(fit,contrast) glmLRT(fit,contrast=contrast),fit=fit)

3.8.3 stagewise

library(stageR)
## Loading required package: SummarizedExperiment
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:dplyr':
## 
##     first, rename
## The following object is masked from 'package:tidyr':
## 
##     expand
## The following object is masked from 'package:gplots':
## 
##     space
## The following object is masked from 'package:base':
## 
##     expand.grid
## Loading required package: IRanges
## 
## Attaching package: 'IRanges'
## The following objects are masked from 'package:dplyr':
## 
##     collapse, desc, slice
## The following object is masked from 'package:purrr':
## 
##     reduce
## Loading required package: GenomeInfoDb
## Loading required package: DelayedArray
## Loading required package: matrixStats
## 
## Attaching package: 'matrixStats'
## The following object is masked from 'package:dplyr':
## 
##     count
## The following objects are masked from 'package:Biobase':
## 
##     anyMissing, rowMedians
## Loading required package: BiocParallel
## 
## Attaching package: 'DelayedArray'
## The following objects are masked from 'package:matrixStats':
## 
##     colMaxs, colMins, colRanges, rowMaxs, rowMins, rowRanges
## The following object is masked from 'package:purrr':
## 
##     simplify
## The following objects are masked from 'package:base':
## 
##     aperm, apply, rowsum
## 
## Attaching package: 'stageR'
## The following object is masked from 'package:methods':
## 
##     getMethod
pConfirmation<-sapply(posthoc,function(x) x$table$PValue)
rownames(pConfirmation)<-rownames(posthoc[[1]]$table)
head(pConfirmation)
##                 early DPN-C early OHT-C early OHT-DPN late DPN-C  late OHT-C
## ENSG00000000003   0.1153673  0.17264930     0.8049828 0.77203502 0.670615526
## ENSG00000000419   0.1213990  0.09430385     0.9044222 0.46280953 0.418234915
## ENSG00000000457   0.2516758  0.30491880     0.8707902 0.23941297 0.600233419
## ENSG00000000460   0.4477164  0.15323789     0.4606658 0.01060669 0.001051306
## ENSG00000000938   0.6031783  0.94232035     0.5678194 0.81523811 0.260569727
## ENSG00000000971   0.1165395  0.03637869     0.5511637 0.14712170 0.021072541
##                 late OHT-DPN inter DPN-C inter OHT-C inter OHT-DPN
## ENSG00000000003    0.8862102  0.17138237 0.195507844     0.9417619
## ENSG00000000419    0.1134814  0.51853379 0.075029272     0.2277421
## ENSG00000000457    0.5131414  0.94994806 0.695912080     0.7223458
## ENSG00000000460    0.4325830  0.20728484 0.185158041     0.9461906
## ENSG00000000938    0.3346114  0.58210219 0.418710601     0.7523296
## ENSG00000000971    0.3333804  0.03214387 0.001794392     0.2639844
pScreen <- omnibus$table$PValue
names(pScreen) <-rownames(omnibus)
stageRObj <- stageR(pScreen=pScreen,pConfirmation=pConfirmation, pScreenAdjusted=FALSE)
stageRObj <- stageWiseAdjustment(object=stageRObj, method="holm", alpha=0.05)
stageWiseRes <- getAdjustedPValues(stageRObj,onlySignificantGenes = TRUE,order = TRUE)
## The returned adjusted p-values are based on a stage-wise testing approach and are only valid for the provided target OFDR level of 5%. If a different target OFDR level is of interest,the entire adjustment should be re-run.
colSums(stageWiseRes<0.05)
##    padjScreen   early DPN-C   early OHT-C early OHT-DPN    late DPN-C 
##           108             6             2             2            18 
##    late OHT-C  late OHT-DPN   inter DPN-C   inter OHT-C inter OHT-DPN 
##             9            10             0             0             0

3.9 Plots

for (i in 1:nrow(L))
{
  sigGenes<-rownames(dge)%in%rownames(stageWiseRes)[stageWiseRes[,i+1]<0.05]
   volcano<- ggplot(posthoc[[i]]$table,aes(x=logFC,y=-log10(PValue),color=sigGenes)) + geom_point() + scale_color_manual(values=c("black","red")) + ggtitle(paste("contrast",names(posthoc)[i]))
print(volcano)
plotSmear(posthoc[[i]],de.tags=rownames(dge)[sigGenes],main=paste("contrast",names(posthoc)[i]))
if(sum(sigGenes)>=2)
print(pheatmap(cpm(dge,log=TRUE)[sigGenes,],labels_col = paste(target$agent,target$time,target$patient,sep="-"),main=paste("contrast",names(posthoc)[i])))
}